<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no" name="viewport"/>
  <title>
   新手福利：Apache Spark入门攻略  | 数螺 | NAUT IDEA
  </title>
  <link href="http://cdn.bootcss.com/bootstrap/3.3.6/css/bootstrap-theme.min.css" rel="stylesheet"/>
  <link href="http://cdn.bootcss.com/bootstrap/3.3.6/css/bootstrap.min.css" rel="stylesheet"/>
  <style type="text/css">
   #xmain img {
                  max-width: 100%;
                  display: block;
                  margin-top: 10px;
                  margin-bottom: 10px;
                }

                #xmain p {
                    line-height:150%;
                    font-size: 16px;
                    margin-top: 20px;
                }

                #xmain h2 {
                    font-size: 24px;
                }

                #xmain h3 {
                    font-size: 20px;
                }

                #xmain h4 {
                    font-size: 18px;
                }


                .header {
	           background-color: #0099ff;
	           color: #ffffff;
	           margin-bottom: 20px;
	        }

	        .header p {
                  margin: 0px;
                  padding: 10px 0;
                  display: inline-block;  
                  vertical-align: middle;
                  font-size: 16px;
               }

               .header a {
                 color: white;
               }

              .header img {
                 height: 25px;
              }
  </style>
  <script src="http://cdn.bootcss.com/jquery/3.0.0/jquery.min.js">
  </script>
  <script src="http://nautstatic-10007657.file.myqcloud.com/static/css/readability.min.js" type="text/javascript">
  </script>
  <script type="text/javascript">
   $(document).ready(function() {
                 var loc = document.location;
                 var uri = {
                  spec: "http://dataunion.org/20001.html",
                  host: "http://dataunion.org",
                  prePath: "http://dataunion.org",
                  scheme: "http",
                  pathBase: "http://dataunion.org/"
                 };
    
                 var documentClone = document.cloneNode(true);
                 var article = new Readability(uri, documentClone).parse();
     
                 document.getElementById("xmain").innerHTML = article.content;
                });
  </script>
  <!-- 1466457516: Accept with keywords: (title(0.166666666667):社区,入门,福利,数盟,新手,Spark, topn(0.433333333333):社区,入门,数盟,行业资讯,内存,延时,数据挖掘,洞见,示例,Python,计算,文章,数据处理,Spark,排序,数据,节点,编程,用户,Hadoop,命令,计数器,spark,海量,代码,用例,集群,交互式,文本,分布式).-->
 </head>
 <body onload="">
  <div class="header">
   <div class="container">
    <div class="row">
     <div class="col-xs-6 col-sm-6 text-left">
      <a href="/databee">
       <img src="http://nautidea-10007657.cos.myqcloud.com/logo_white.png"/>
      </a>
      <a href="/databee">
       <p>
        数螺
       </p>
      </a>
     </div>
     <div class="hidden-xs col-sm-6 text-right">
      <p>
       致力于数据科学的推广和知识传播
      </p>
     </div>
    </div>
   </div>
  </div>
  <div class="container text-center">
   <h1>
    新手福利：Apache Spark入门攻略
   </h1>
  </div>
  <div class="container" id="xmain">
   ﻿﻿
   <title>
    新手福利：Apache Spark入门攻略 | 数盟社区
   </title>
   <!-- All in One SEO Pack 2.2.7.6.2 by Michael Torbert of Semper Fi Web Design[32,68] -->
   <!-- /all in one seo pack -->
   <!--
<div align="center">
<a href="http://strata.oreilly.com.cn/hadoop-big-data-cn?cmp=mp-data-confreg-home-stcn16_dataunion_pc" target="_blank"><img src="http://dataunion.org/wp-content/uploads/2016/05/stratabj.jpg"/ ></a>
</div>
-->
   <header id="header-web">
    <div class="header-main">
     <hgroup class="logo">
      <h1>
       <a href="http://dataunion.org/" rel="home" title="数盟社区">
        <img src="http://dataunion.org/wp-content/themes/yzipi/images/logo.png"/>
       </a>
      </h1>
     </hgroup>
     <!--logo-->
     <nav class="header-nav">
      <ul class="menu" id="menu-%e4%b8%bb%e8%8f%9c%e5%8d%95">
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-71" id="menu-item-71">
        <a href="http://dataunion.org/category/events" title="events">
         活动
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-22457" id="menu-item-22457">
          <a href="http://dataunion.org/2016timeline">
           2016档期
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-22459" id="menu-item-22459">
          <a href="http://dataunion.org/category/parterc">
           合作会议
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-post-ancestor current-menu-parent current-post-parent menu-item-has-children menu-item-20869" id="menu-item-20869">
        <a href="http://dataunion.org/category/tech" title="articles">
         文章
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-20867" id="menu-item-20867">
          <a href="http://dataunion.org/category/tech/base" title="base">
           基础架构
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3302" id="menu-item-3302">
          <a href="http://dataunion.org/category/tech/ai" title="ai">
           人工智能
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3303" id="menu-item-3303">
          <a href="http://dataunion.org/category/tech/analysis" title="analysis">
           数据分析
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21920" id="menu-item-21920">
          <a href="http://dataunion.org/category/tech/dm">
           数据挖掘
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3314" id="menu-item-3314">
          <a href="http://dataunion.org/category/tech/viz" title="viz">
           可视化
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3305" id="menu-item-3305">
          <a href="http://dataunion.org/category/tech/devl" title="devl">
           编程语言
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-20876" id="menu-item-20876">
        <a href="http://dataunion.org/category/industry">
         行业
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-16328" id="menu-item-16328">
          <a href="http://dataunion.org/category/industry/case" title="case">
           行业应用
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-2112" id="menu-item-2112">
          <a href="http://dataunion.org/category/industry/demo" title="demo">
           Demo展示
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21562" id="menu-item-21562">
          <a href="http://dataunion.org/category/industry/news">
           行业资讯
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-311" id="menu-item-311">
        <a href="http://dataunion.org/category/sources" title="sources">
         资源
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-20870" id="menu-item-20870">
        <a href="http://dataunion.org/category/books" title="book">
         图书
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21363" id="menu-item-21363">
        <a href="http://dataunion.org/category/training">
         课程
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-21853" id="menu-item-21853">
        <a href="http://dataunion.org/category/jobs">
         职位
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-22050" id="menu-item-22050">
          <a href="http://dataunion.org/category/career">
           职业规划
          </a>
         </li>
        </ul>
       </li>
      </ul>
     </nav>
     <!--header-nav-->
    </div>
   </header>
   <!--header-web-->
   <div id="main">
    <div id="soutab">
     <form action="http://dataunion.org/" class="search" method="get">
     </form>
    </div>
    <div id="container">
     <nav id="mbx">
      当前位置：
      <a href="http://dataunion.org">
       首页
      </a>
      &gt;
      <a href="http://dataunion.org/category/tech">
       文章
      </a>
      &gt;  正文
     </nav>
     <!--mbx-->
     <article class="content">
      <header align="centre" class="contenttitle">
       <div class="mscc">
        <h1 class="mscctitle">
         <a href="http://dataunion.org/20001.html">
          新手福利：Apache Spark入门攻略
         </a>
        </h1>
        <address class="msccaddress ">
         <em>
          2,116 次阅读 -
         </em>
         <a href="http://dataunion.org/category/tech" rel="category tag">
          文章
         </a>
        </address>
       </div>
      </header>
      <div class="content-text">
       <p>
        作者
        <span class="ago">
         Ashwini Kuntamukkala  出处：
         <a href="http://www.csdn.net/article/2015-07-10/2825184">
          CSDN
         </a>
        </span>
       </p>
       <p>
        本文聚焦Apache Spark入门，了解其在大数据领域的地位，覆盖Apache Spark的安装及应用程序的建立，并解释一些常见的行为和操作。
       </p>
       <p>
        <b>
         一、 为什么要使用Apache Spark
        </b>
       </p>
       <p>
        时下，我们正处在一个“大数据”的时代，每时每刻，都有各种类型的数据被生产。而在此紫外，数据增幅的速度也在显著增加。从广义上看，这些数据包含交易数据、社交媒体内容（比如文本、图像和视频）以及传感器数据。那么，为什么要在这些内容上投入如此多精力，其原因无非就是从海量数据中提取洞见可以对生活和生产实践进行很好的指导。
       </p>
       <p>
        在几年前，只有少部分公司拥有足够的技术力量和资金去储存和挖掘大量数据，并对其挖掘从而获得洞见。然而，被雅虎2009年开源的Apache Hadoop对这一状况产生了颠覆性的冲击——通过使用商用服务器组成的集群大幅度地降低了海量数据处理的门槛。因此，许多行业（比如Health care、Infrastructure、Finance、Insurance、Telematics、Consumer、Retail、Marketing、E-commerce、Media、 Manufacturing和Entertainment）开始了Hadoop的征程，走上了海量数据提取价值的道路。着眼Hadoop，其主要提供了两个方面的功能：
       </p>
       <ul>
        <li>
         通过水平扩展商用主机，HDFS提供了一个廉价的方式对海量数据进行容错存储。
        </li>
        <li>
         MapReduce计算范例，提供了一个简单的编程模型来挖掘数据并获得洞见。
        </li>
       </ul>
       <p>
        下图展示了MapReduce的数据处理流程，其中一个Map-Reduce step的输出将作为下一个典型Hadoop job的输入结果。
       </p>
       <p>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f8e8b69e65_middle.jpg?_=21288"/>
       </p>
       <p>
        在整个过程中，中间结果会借助磁盘传递，因此对比计算，大量的Map-Reduced作业都受限于IO。然而对于ETL、数据整合和清理这样的用例来说，IO约束并不会产生很大的影响，因为这些场景对数据处理时间往往不会有较高的需求。然而，在现实世界中，同样存在许多对延时要求较为苛刻的用例，比如：
       </p>
       <ol>
        <li>
         对流数据进行处理来做近实时分析。举个例子，通过分析点击流数据做视频推荐，从而提高用户的参与度。在这个用例中，开发者必须在精度和延时之间做平衡。
        </li>
        <li>
         在大型数据集上进行交互式分析，数据科学家可以在数据集上做ad-hoc查询。
        </li>
       </ol>
       <p>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f8eae3ea93_middle.jpg?_=56185"/>
       </p>
       <p>
        毫无疑问，历经数年发展，Hadoop生态圈中的丰富工具已深受用户喜爱，然而这里仍然存在众多问题给使用带来了挑战：
       </p>
       <p>
        <b>
         1.
        </b>
        每个用例都需要多个不同的技术堆栈来支撑，在不同使用场景下，大量的解决方案往往捉襟见肘。
       </p>
       <p>
        <b>
         2.
        </b>
        在生产环境中机构往往需要精通数门技术。
       </p>
       <p>
        <b>
         3.
        </b>
        许多技术存在版本兼容性问题。
       </p>
       <p>
        <b>
         4.
        </b>
        无法在并行job中更快地共享数据。
       </p>
       <p>
        而通过Apache Spark，上述问题迎刃而解！Apache Spark是一个轻量级的内存集群计算平台，通过不同的组件来支撑批、流和交互式用例，如下图。
       </p>
       <p>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f8ed32b003_middle.jpg?_=32356"/>
       </p>
       <p>
        <b>
         二、 关于Apache Spark
        </b>
       </p>
       <p>
        Apache Spark是个开源和兼容Hadoop的集群计算平台。由加州大学伯克利分校的AMPLabs开发，作为Berkeley Data Analytics Stack（BDAS）的一部分，当下由大数据公司Databricks保驾护航，更是Apache旗下的顶级项目，下图显示了Apache Spark堆栈中的不同组件。
       </p>
       <p>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f8ef008026_middle.jpg?_=54951"/>
       </p>
       <p>
        <b>
         Apache Spark的5大优势：
        </b>
       </p>
       <p>
        1.更高的性能，因为数据被加载到集群主机的分布式内存中。数据可以被快速的转换迭代，并缓存用以后续的频繁访问需求。很多对Spark感兴趣的朋友可能也会听过这样一句话——在数据全部加载到内存的情况下，Spark可以比Hadoop快100倍，在内存不够存放所有数据的情况下快Hadoop 10倍。
       </p>
       <p>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f8f04290c8.jpg"/>
       </p>
       <p>
        <b>
         2.
        </b>
        通过建立在Java、Scala、Python、SQL（应对交互式查询）的标准API以方便各行各业使用，同时还含有大量开箱即用的机器学习库。
       </p>
       <p>
        <b>
         3.
        </b>
        与现有Hadoop v1 (SIMR) 和2.x (YARN) 生态兼容，因此机构可以进行无缝迁移。
       </p>
       <p>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f8f1e70915_middle.jpg?_=57231"/>
       </p>
       <p>
        <b>
         4.
        </b>
        方便下载和安装。方便的shell（REPL: Read-Eval-Print-Loop）可以对API进行交互式的学习。
       </p>
       <p>
        <b>
         5.
        </b>
        借助高等级的架构提高生产力，从而可以讲精力放到计算上。
       </p>
       <p>
        同时，Apache Spark由Scala实现，代码非常简洁。
       </p>
       <p>
        <b>
         三、安装Apache Spark
        </b>
       </p>
       <p>
        下表列出了一些重要链接和先决条件：
       </p>
       <table cellpadding='\"0\"' cellspacing='\"0\"'>
        <tbody>
         <tr>
          <td class='\"left_td_colored\"'>
           Current Release
          </td>
          <td class='\"left_td_colored\"'>
           1.0.1 @
           <a href="http://www.csdn.net/%22http://d3kbcqa49mib13.cloudfront.net/spark-1.0.1.tgz/%22">
            http://d3kbcqa49mib13.cloudfront.net/spark-1.0.1.tgz
           </a>
          </td>
         </tr>
         <tr>
          <td class='\"right_td_colored\"'>
           Downloads Page
          </td>
          <td class='\"right_td_colored\"'>
           <a href="http://www.csdn.net/%22https://spark.apache.org/downloads.html/%22">
            https://spark.apache.org/downloads.html
           </a>
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           JDK Version (Required)
          </td>
          <td class='\"left_td_colored\"'>
           1.6 or higher
          </td>
         </tr>
         <tr>
          <td class='\"right_td_colored\"'>
           Scala Version (Required)
          </td>
          <td class='\"right_td_colored\"'>
           2.10 or higher
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           Python (Optional)
          </td>
          <td class='\"left_td_colored\"'>
           [2.6, 3.0)
          </td>
         </tr>
         <tr>
          <td class='\"right_td_colored\"'>
           Simple Build Tool (Required)
          </td>
          <td class='\"right_td_colored\"'>
           <a href="http://www.csdn.net/%22http://www.scala-sbt.org/%22">
            http://www.scala-sbt.org
           </a>
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           Development Version
          </td>
          <td class='\"left_td_colored\"'>
           <a href="http://www.csdn.net/%22https://github.com/apache/spark/%22">
            git clone git://github.com/apache/spark.git
           </a>
          </td>
         </tr>
         <tr>
          <td class='\"right_td_colored\"'>
           Building Instructions
          </td>
          <td class='\"right_td_colored\"'>
           <a href="http://www.csdn.net/%22https://spark.apache.org/docs/latest/building-with-maven.html/%22">
            https://spark.apache.org/docs/latest/building-with-maven.html
           </a>
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           Maven
          </td>
          <td class='\"left_td_colored\"'>
           3.0 or higher
          </td>
         </tr>
        </tbody>
       </table>
       <p>
        如图6所示，Apache Spark的部署方式包括standalone、Hadoop V1 SIMR、Hadoop 2 YARN/Mesos。Apache Spark需求一定的Java、Scala或Python知识。这里，我们将专注standalone配置下的安装和运行。
       </p>
       <p>
        1.安装JDK 1.6+、Scala 2.10+、Python [2.6,3] 和sbt
       </p>
       <p>
        2.下载Apache Spark 1.0.1 Release
       </p>
       <p>
        3.在指定目录下Untar和Unzip spark-1.0.1.tgz
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da616381617518" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          akuntamukkala@localhost~/Downloads$ pwd 
/Users/akuntamukkala/Downloads akuntamukkala@localhost~/Downloads$ tar -zxvf spark- 1.0.1.tgz -C /Users/akuntamukkala/spark
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da616381617518-1">
               1
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da616381617518-2">
               2
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da616381617518-1">
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-sy">
                @
               </span>
               <span class="crayon-v">
                localhost
               </span>
               <span class="crayon-o">
                ~
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                Downloads
               </span>
               <span class="crayon-sy">
                $
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                pwd
               </span>
               <span class="crayon-h">
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da616381617518-2">
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                Users
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-e">
                Downloads
               </span>
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-sy">
                @
               </span>
               <span class="crayon-v">
                localhost
               </span>
               <span class="crayon-o">
                ~
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                Downloads
               </span>
               <span class="crayon-sy">
                $
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                tar
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-e">
                zxvf
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                1.0.1.tgz
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-v">
                C
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                Users
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                spark
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0019 seconds] -->
       <p>
        4.运行sbt建立Apache Spark
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da627289554376" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          akuntamukkala@localhost~/spark/spark-1.0.1$ pwd /Users/akuntamukkala/spark/spark-1.0.1 akuntamukkala@localhost~/spark/spark-1.0.1$ sbt/sbt assembly
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da627289554376-1">
               1
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da627289554376-1">
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-sy">
                @
               </span>
               <span class="crayon-v">
                localhost
               </span>
               <span class="crayon-o">
                ~
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-cn">
                1.0.1
               </span>
               <span class="crayon-sy">
                $
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                pwd
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                Users
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-cn">
                1.0.1
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-sy">
                @
               </span>
               <span class="crayon-v">
                localhost
               </span>
               <span class="crayon-o">
                ~
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-cn">
                1.0.1
               </span>
               <span class="crayon-sy">
                $
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                sbt
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-e">
                sbt
               </span>
               <span class="crayon-v">
                assembly
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0019 seconds] -->
       <p>
        5.发布Scala的Apache Spark standalone REPL
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da62e601805519" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          /Users/akuntamukkala/spark/spark-1.0.1/bin/spark-shell
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da62e601805519-1">
               1
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da62e601805519-1">
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                Users
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-cn">
                1.0.1
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                bin
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-v">
                shell
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0009 seconds] -->
       <p>
        如果是Python
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da634551585770" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          /Users/akuntamukkala/spark/spark-1.0.1/bin/ pyspark
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da634551585770-1">
               1
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da634551585770-1">
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                Users
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-cn">
                1.0.1
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                bin
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                pyspark
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0009 seconds] -->
       <p>
        6.查看SparkUI @
        <a href="http://localhost:4040/">
         http://localhost:4040
        </a>
       </p>
       <p>
        <b>
         四、Apache Spark的工作模式
        </b>
       </p>
       <p>
        Spark引擎提供了在集群中所有主机上进行分布式内存数据处理的能力，下图显示了一个典型Spark job的处理流程。
       </p>
       <p>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f90f39a6cc_middle.jpg?_=45967"/>
       </p>
       <p>
        下图显示了Apache Spark如何在集群中执行一个作业。
       </p>
       <p>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f91044aed3_middle.jpg?_=62738"/>
       </p>
       <p>
        Master控制数据如何被分割，利用了数据本地性，并在Slaves上跟踪所有分布式计算。在某个Slave不可用时，其存储的数据会分配给其他可用的Slaves。虽然当下（1.0.1版本）Master还存在单点故障，但后期必然会被修复。
       </p>
       <p>
        <b>
         五、弹性分布式数据集（Resilient Distributed Dataset，RDD）
        </b>
       </p>
       <p>
        弹性分布式数据集（RDD，从Spark 1.3版本开始已被DataFrame替代）是Apache Spark的核心理念。它是由数据组成的不可变分布式集合，其主要进行两个操作：transformation和action。Transformation是类似在RDD上做 filter()、map()或union() 以生成另一个RDD的操作，而action则是count()、first()、take(n)、collect() 等促发一个计算并返回值到Master或者稳定存储系统的操作。Transformations一般都是lazy的，直到action执行后才会被执行。Spark Master/Driver会保存RDD上的Transformations。这样一来，如果某个RDD丢失（也就是salves宕掉），它可以快速和便捷地转换到集群中存活的主机上。这也就是RDD的弹性所在。
       </p>
       <p>
        下图展示了Transformation的lazy：
       </p>
       <p>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f9118e2de1_middle.jpg?_=18222"/>
       </p>
       <p>
        我们可以通过下面示例来理解这个概念：从文本中发现5个最常用的word。下图显示了一个可能的解决方案。
       </p>
       <p>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f91285c06c_middle.jpg?_=34009"/>
       </p>
       <p>
        在上面命令中，我们对文本进行读取并且建立字符串的RDD。每个条目代表了文本中的1行。
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da63c617125245" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          scala&amp;gt; val hamlet = sc.textFile(“/Users/akuntamukkala/temp/gutenburg.txt”)
hamlet: org.apache.spark.rdd.RDD[String] = MappedRDD[1] at textFile at &amp;lt;console&amp;gt;:12
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da63c617125245-1">
               1
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da63c617125245-2">
               2
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da63c617125245-1">
               <span class="crayon-v">
                scala
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                hamlet
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                sc
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                textFile
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                Users
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                temp
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                gutenburg
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-i">
                txt
               </span>
               ”
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da63c617125245-2">
               <span class="crayon-v">
                hamlet
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                org
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                apache
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                rdd
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                RDD
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-t">
                String
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                MappedRDD
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-cn">
                1
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                at
               </span>
               <span class="crayon-e">
                textFile
               </span>
               <span class="crayon-v">
                at
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                lt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-v">
                console
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-cn">
                12
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0026 seconds] -->
       <p>
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da643561546358" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          scala&amp;gt; val topWordCount = hamlet.flatMap(str=&amp;gt;str.split(“ “)). filter(!_.isEmpty).map(word=&amp;gt;(word,1)).reduceByKey(_+_).map{case (word, count) =&amp;gt; (count, word)}.sortByKey(false)
topWordCount: org.apache.spark.rdd.RDD[(Int, String)] = MapPartitionsRDD[10] at sortByKey at &amp;lt;console&amp;gt;:14
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da643561546358-1">
               1
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da643561546358-2">
               2
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da643561546358-1">
               <span class="crayon-v">
                scala
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                topWordCount
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                hamlet
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                flatMap
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                str
               </span>
               <span class="crayon-o">
                =&amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-v">
                str
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                split
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-h">
               </span>
               “
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                filter
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-o">
                !
               </span>
               <span class="crayon-v">
                _
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                isEmpty
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                map
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-t">
                word
               </span>
               <span class="crayon-o">
                =&amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-t">
                word
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-cn">
                1
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                reduceByKey
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                _
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-v">
                _
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                map
               </span>
               <span class="crayon-sy">
                {
               </span>
               <span class="crayon-st">
                case
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-t">
                word
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                count
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =&amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                count
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-t">
                word
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                }
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                sortByKey
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-t">
                false
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da643561546358-2">
               <span class="crayon-v">
                topWordCount
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                org
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                apache
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                rdd
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                RDD
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-t">
                Int
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-t">
                String
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                MapPartitionsRDD
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-cn">
                10
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                at
               </span>
               <span class="crayon-e">
                sortByKey
               </span>
               <span class="crayon-v">
                at
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                lt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-v">
                console
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-cn">
                14
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0044 seconds] -->
       <p>
        <b>
         1.
        </b>
        通过上述命令我们可以发现这个操作非常简单——通过简单的Scala API来连接transformations和actions。
       </p>
       <p>
        <b>
         2.
        </b>
        可能存在某些words被1个以上空格分隔的情况，导致有些words是空字符串，因此需要使用filter(!_.isEmpty)将它们过滤掉。
       </p>
       <p>
        <b>
         3.
        </b>
        每个word都被映射成一个键值对：map(word=&gt;(word,1))。
       </p>
       <p>
        <b>
         4.
        </b>
        为了合计所有计数，这里需要调用一个reduce步骤——reduceByKey(_+_)。 _+_ 可以非常便捷地为每个key赋值。
       </p>
       <p>
        <b>
         5.
        </b>
        我们得到了words以及各自的counts，下一步需要做的是根据counts排序。在Apache Spark，用户只能根据key排序，而不是值。因此，这里需要使用map{case (word, count) =&gt; (count, word)}将(word, count)流转到(count, word)。
       </p>
       <p>
        <b>
         6.
        </b>
        需要计算最常用的5个words，因此需要使用sortByKey(false)做一个计数的递减排序。
       </p>
       <p>
        上述命令包含了一个.take(5) (an action operation, which triggers computation)和在 /Users/akuntamukkala/temp/gutenburg.txt文本中输出10个最常用的words。在Python shell中用户可以实现同样的功能。
       </p>
       <p>
        RDD lineage可以通过toDebugString（一个值得记住的操作）来跟踪。
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da64b889118967" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          scala&amp;gt; topWordCount.take(5).foreach(x=&amp;gt;println(x))
(1044,the)
(730,and)
(679,of)
(648,to)
(511,I)
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da64b889118967-1">
               1
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da64b889118967-2">
               2
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da64b889118967-3">
               3
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da64b889118967-4">
               4
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da64b889118967-5">
               5
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da64b889118967-6">
               6
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da64b889118967-1">
               <span class="crayon-v">
                scala
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                topWordCount
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                take
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                5
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-st">
                foreach
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                x
               </span>
               <span class="crayon-o">
                =&amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-e">
                println
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                x
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da64b889118967-2">
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                1044
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-v">
                the
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da64b889118967-3">
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                730
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-st">
                and
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da64b889118967-4">
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                679
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-v">
                of
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da64b889118967-5">
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                648
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-st">
                to
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da64b889118967-6">
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                511
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-v">
                I
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0018 seconds] -->
       <p>
        <b>
         常用的Transformations：
        </b>
       </p>
       <table cellpadding='\"0\"' cellspacing='\"0\"'>
        <thead>
         <tr>
          <th class='\"left_td_colored\"'>
           Transformation &amp; Purpose
          </th>
          <th class='\"left_td_colored\"'>
           Example &amp; Result
          </th>
         </tr>
        </thead>
        <tbody>
         <tr>
          <td class='\"right_td_colored\"'>
           filter(func)
           <strong>
            Purpose:
           </strong>
           new RDD by selecting those data elements on which func returns true
          </td>
          <td class='\"right_td_colored\"'>
           scala&gt; val rdd = sc.parallelize(List(“ABC”,”BCD”,”DEF”)) scala&gt; val filtered = rdd.filter(_.contains(“C”)) scala&gt; filtered.collect()
           <strong>
            Result:
           </strong>
           <br/>
           Array[String] = Array(ABC, BCD)
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           map(func)
           <strong>
            Purpose:
           </strong>
           return new RDD by applying func on each data element
          </td>
          <td class='\"left_td_colored\"'>
           scala&gt; val rdd=sc.parallelize(List(1,2,3,4,5)) scala&gt; val times2 = rdd.map(_*2) scala&gt; times2.collect()
           <strong>
            Result:
           </strong>
           <br/>
           Array[Int] = Array(2, 4, 6, 8, 10)
          </td>
         </tr>
         <tr>
          <td class='\"right_td_colored\"'>
           flatMap(func)
           <strong>
            Purpose:
           </strong>
           Similar to map but func returns a Seq instead of a value. For example, mapping a sentence into a Seq of words
          </td>
          <td class='\"right_td_colored\"'>
           scala&gt; val rdd=sc.parallelize(List(“Spark is awesome”,”It is fun”)) scala&gt; val fm=rdd.flatMap(str=&gt;str.split(“ “)) scala&gt; fm.collect()
           <strong>
            Result:
           </strong>
           <br/>
           Array[String] = Array(Spark, is, awesome, It, is, fun)
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           reduceByKey(func,[numTasks])
           <strong>
            Purpose:
           </strong>
           To aggregate values of a key using a function. “numTasks” is an optional parameter to specify number of reduce tasks
          </td>
          <td class='\"left_td_colored\"'>
           scala&gt; val word1=fm.map(word=&gt;(word,1)) scala&gt; val wrdCnt=word1.reduceByKey(_+_) scala&gt; wrdCnt.collect()
           <strong>
            Result:
           </strong>
           <br/>
           Array[(String, Int)] = Array((is,2), (It,1), (awesome,1), (Spark,1), (fun,1))
          </td>
         </tr>
         <tr>
          <td class='\"right_td_colored\"'>
           groupByKey([numTasks])
           <strong>
            Purpose:
           </strong>
           To convert (K,V) to (K,Iterable&lt;V&gt;)
          </td>
          <td class='\"right_td_colored\"'>
           scala&gt; val cntWrd = wrdCnt.map{case (word, count) =&gt; (count, word)} scala&gt; cntWrd.groupByKey().collect()
           <strong>
            Result:
           </strong>
           <br/>
           Array[(Int, Iterable[String])] = Array((1,ArrayBuffer(It, awesome, Spark, fun)), (2,ArrayBuffer(is)))
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           distinct([numTasks])
           <strong>
            Purpose:
           </strong>
           Eliminate duplicates from RDD
          </td>
          <td class='\"left_td_colored\"'>
           scala&gt; fm.distinct().collect()
           <strong>
            Result:
           </strong>
           <br/>
           Array[String] = Array(is, It, awesome, Spark, fun)
          </td>
         </tr>
        </tbody>
       </table>
       <p>
        <b>
         常用的集合操作：
        </b>
       </p>
       <table cellpadding='\"0\"' cellspacing='\"0\"'>
        <thead>
         <tr>
          <th class='\"left_td_colored\"'>
           Transformation and Purpose
          </th>
          <th class='\"left_td_colored\"'>
           Example and Result
          </th>
         </tr>
        </thead>
        <tbody>
         <tr>
          <td class='\"right_td_colored\"'>
           union()
           <br/>
           <strong>
            Purpose:
           </strong>
           new RDD containing all elements from source RDD and argument.
          </td>
          <td class='\"right_td_colored\"'>
           Scala&gt; val rdd1=sc.parallelize(List(‘A’,’B’))
           <br/>
           scala&gt; val rdd2=sc.parallelize(List(‘B’,’C’))
           <br/>
           scala&gt; rdd1.union(rdd2).collect()
           <br/>
           <strong>
            Result:
           </strong>
           <br/>
           Array[Char] = Array(A, B, B, C)
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           intersection()
           <br/>
           <strong>
            Purpose:
           </strong>
           new RDD containing only common elements from source RDD and argument.
          </td>
          <td class='\"left_td_colored\"'>
           Scala&gt; rdd1.intersection(rdd2).collect()
           <br/>
           <strong>
            Result:
           </strong>
           <br/>
           Array[Char] = Array(B)
          </td>
         </tr>
         <tr>
          <td class='\"right_td_colored\"'>
           cartesian()
           <br/>
           <strong>
            Purpose:
           </strong>
           new RDD cross product of all elements from source RDD and argument
          </td>
          <td class='\"right_td_colored\"'>
           Scala&gt; rdd1.cartesian(rdd2).collect()
           <br/>
           <strong>
            Result:
           </strong>
           <br/>
           Array[(Char, Char)] = Array((A,B), (A,C), (B,B), (B,C))
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           subtract()
           <br/>
           <strong>
            Purpose:
           </strong>
           new RDD created by removing data elements in source RDD in common with argument
          </td>
          <td class='\"left_td_colored\"'>
           scala&gt; rdd1.subtract(rdd2).collect()
           <strong>
            Result:
           </strong>
           <br/>
           Array[Char] = Array(A)
          </td>
         </tr>
         <tr>
          <td class='\"right_td_colored\"'>
           join(RDD,[numTasks])
           <br/>
           <strong>
            Purpose:
           </strong>
           When invoked on (K,V) and (K,W), this operation creates a new RDD of (K, (V,W))
          </td>
          <td class='\"right_td_colored\"'>
           scala&gt; val personFruit = sc.parallelize(Seq((“Andy”, “Apple”), (“Bob”, “Banana”), (“Charlie”, “Cherry”), (“Andy”,”Apricot”)))
           <br/>
           scala&gt; val personSE = sc.parallelize(Seq((“Andy”, “Google”), (“Bob”, “Bing”), (“Charlie”, “Yahoo”), (“Bob”,”AltaVista”)))
           <br/>
           scala&gt; personFruit.join(personSE).collect()
           <br/>
           <strong>
            Result:
           </strong>
           <br/>
           Array[(String, (String, String))] = Array((Andy,(Apple,Google)), (Andy,(Apricot,Google)), (Charlie,(Cherry,Yahoo)), (Bob,(Banana,Bing)), (Bob,(Banana,AltaVista)))
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           cogroup(RDD,[numTasks])
           <br/>
           <strong>
            Purpose:
           </strong>
           To convert (K,V) to (K,Iterable&lt;V&gt;)
          </td>
          <td class='\"left_td_colored\"'>
           scala&gt; personFruit.cogroup(personSe).collect()
           <br/>
           <strong>
            Result:
           </strong>
           <br/>
           Array[(String, (Iterable[String], Iterable[String]))] = Array((Andy,(ArrayBuffer(Apple, Apricot),ArrayBuffer(google))), (Charlie,(ArrayBuffer(Cherry),ArrayBuffer(Yahoo))), (Bob,(ArrayBuffer(Banana),ArrayBuffer(Bing, AltaVista))))
          </td>
         </tr>
        </tbody>
       </table>
       <p>
        更多transformations信息，请查看http://spark.apache.org/docs/latest/programming-guide.html#transformations
       </p>
       <p>
        <b>
         常用的actions
        </b>
       </p>
       <table cellpadding='\"0\"' cellspacing='\"0\"'>
        <thead>
         <tr>
          <th class='\"left_td_colored\"'>
           Action &amp; Purpose
          </th>
          <th class='\"left_td_colored\"'>
           Example &amp; Result
          </th>
         </tr>
        </thead>
        <tbody>
         <tr>
          <td class='\"right_td_colored\"'>
           count()
           <strong>
            Purpose:
           </strong>
           get the number of data elements in the RDD
          </td>
          <td class='\"right_td_colored\"'>
           scala&gt; val rdd = sc.parallelize(list(‘A’,’B’,’c’)) scala&gt; rdd.count()
           <strong>
            Result:
           </strong>
           <br/>
           long = 3
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           collect()
           <strong>
            Purpose:
           </strong>
           get all the data elements in an RDD as an array
          </td>
          <td class='\"left_td_colored\"'>
           scala&gt; val rdd = sc.parallelize(list(‘A’,’B’,’c’)) scala&gt; rdd.collect()
           <strong>
            Result:
           </strong>
           <br/>
           Array[char] = Array(A, B, c)
          </td>
         </tr>
         <tr>
          <td class='\"right_td_colored\"'>
           reduce(func)
           <strong>
            Purpose:
           </strong>
           Aggregate the data elements in an RDD using this function which takes two arguments and returns one
          </td>
          <td class='\"right_td_colored\"'>
           scala&gt; val rdd = sc.parallelize(list(1,2,3,4)) scala&gt; rdd.reduce(_+_)
           <strong>
            Result:
           </strong>
           <br/>
           Int = 10
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           take (n)
           <strong>
            Purpose:
           </strong>
           : fetch first n data elements in an RDD. computed by driver program.
          </td>
          <td class='\"left_td_colored\"'>
           Scala&gt; val rdd = sc.parallelize(list(1,2,3,4)) scala&gt; rdd.take(2)
           <strong>
            Result:
           </strong>
           <br/>
           Array[Int] = Array(1, 2)
          </td>
         </tr>
         <tr>
          <td class='\"right_td_colored\"'>
           foreach(func)
           <strong>
            Purpose:
           </strong>
           execute function for each data element in RDD. usually used to update an accumulator(discussed later) or interacting with external systems.
          </td>
          <td class='\"right_td_colored\"'>
           Scala&gt; val rdd = sc.parallelize(list(1,2,3,4)) scala&gt; rdd.foreach(x=&gt;println(“%s*10=%s”. format(x,x*10)))
           <strong>
            Result:
           </strong>
           <br/>
           1*10=10 4*10=40 3*10=30 2*10=20
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           first()
           <strong>
            Purpose:
           </strong>
           retrieves the first data element in RDD. Similar to take(1)
          </td>
          <td class='\"left_td_colored\"'>
           scala&gt; val rdd = sc.parallelize(list(1,2,3,4)) scala&gt; rdd.first()
           <strong>
            Result:
           </strong>
           <br/>
           Int = 1
          </td>
         </tr>
         <tr>
          <td class='\"right_td_colored\"'>
           saveAsTextFile(path)
           <strong>
            Purpose:
           </strong>
           Writes the content of RDD to a text file or a set of text files to local file system/ HDFS
          </td>
          <td class='\"right_td_colored\"'>
           scala&gt; val hamlet = sc.textFile(“/users/akuntamukkala/ temp/gutenburg.txt”) scala&gt; hamlet.filter(_.contains(“Shakespeare”)). saveAsTextFile(“/users/akuntamukkala/temp/ filtered”)
           <strong>
            Result:
           </strong>
           <br/>
           akuntamukkala@localhost~/temp/filtered$ ls _SUCCESS part-00000 part-00001
          </td>
         </tr>
        </tbody>
       </table>
       <p>
        更多actions参见http://spark.apache.org/docs/latest/programming-guide.html#actions
       </p>
       <p>
        <b>
         六、RDD持久性
        </b>
       </p>
       <p>
        Apache Spark中一个主要的能力就是在集群内存中持久化/缓存RDD。这将显著地提升交互速度。下表显示了Spark中各种选项。
       </p>
       <table cellpadding='\"0\"' cellspacing='\"0\"'>
        <thead>
         <tr>
          <th class='\"left_td_colored\"'>
           Storage Level
          </th>
          <th class='\"left_td_colored\"'>
           Purpose
          </th>
         </tr>
        </thead>
        <tbody>
         <tr>
          <td class='\"right_td_colored\"'>
           MEMORY_ONLY (Default level)
          </td>
          <td class='\"right_td_colored\"'>
           This option stores RDD in available cluster memory as deserialized Java objects. Some partitions may not be cached if there is not enough cluster memory. Those partitions will be recalculated on the fly as needed.
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           MEMORY_AND_DISK
          </td>
          <td class='\"left_td_colored\"'>
           This option stores RDD as deserialized Java objects. If RDD does not fit in cluster memory, then store those partitions on the disk and read them as needed.
          </td>
         </tr>
         <tr>
          <td class='\"right_td_colored\"'>
           MEMORY_ONLY_SER
          </td>
          <td class='\"right_td_colored\"'>
           This options stores RDD as serialized Java objects (One byte array per partition). This is more CPU intensive but saves memory as it is more space efficient. Some partitions may not be cached. Those will be recalculated on the fly as needed.
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           MEMORY_ONLY_DISK_SER
          </td>
          <td class='\"left_td_colored\"'>
           This option is same as above except that disk is used when memory is not sufficient.
          </td>
         </tr>
         <tr>
          <td class='\"right_td_colored\"'>
           DISC_ONLY
          </td>
          <td class='\"right_td_colored\"'>
           This option stores the RDD only on the disk
          </td>
         </tr>
         <tr>
          <td class='\"left_td_colored\"'>
           MEMORY_ONLY_2, MEMORY_AND_DISK_2, etc.
          </td>
          <td class='\"left_td_colored\"'>
           Same as other levels but partitions are replicated on 2 slave nodes
          </td>
         </tr>
        </tbody>
       </table>
       <p>
        上面的存储等级可以通过RDD. cache()操作上的 persist()操作访问，可以方便地指定MEMORY_ONLY选项。关于持久化等级的更多信息，可以访问这里http://spark.apache.org/docs/latest/programming-guide.html#rdd-persistence。
       </p>
       <p>
        Spark使用Least Recently Used (LRU)算法来移除缓存中旧的、不常用的RDD，从而释放出更多可用内存。同样还提供了一个unpersist() 操作来强制移除缓存/持久化的RDD。
       </p>
       <p>
        <b>
         七、变量共享
        </b>
       </p>
       <p>
        <b>
         Accumulators。
        </b>
        Spark提供了一个非常便捷地途径来避免可变的计数器和计数器同步问题——Accumulators。Accumulators在一个Spark context中通过默认值初始化，这些计数器在Slaves节点上可用，但是Slaves节点不能对其进行读取。它们的作用就是来获取原子更新，并将其转发到Master。Master是唯一可以读取和计算所有更新合集的节点。举个例子：
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da65e500629538" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          akuntamukkala@localhost~/temp$ cat output.log
error
warning
info
trace
error
info
info
scala&amp;gt; val nErrors=sc.accumulator(0.0)
scala&amp;gt; val logs = sc.textFile(“/Users/akuntamukkala/temp/output.log”)
scala&amp;gt; logs.filter(_.contains(“error”)).foreach(x=&amp;gt;nErrors+=1)
scala&amp;gt; nErrors.value
Result:Int = 2
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da65e500629538-1">
               1
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da65e500629538-2">
               2
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da65e500629538-3">
               3
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da65e500629538-4">
               4
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da65e500629538-5">
               5
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da65e500629538-6">
               6
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da65e500629538-7">
               7
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da65e500629538-8">
               8
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da65e500629538-9">
               9
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da65e500629538-10">
               10
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da65e500629538-11">
               11
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da65e500629538-12">
               12
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da65e500629538-13">
               13
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da65e500629538-1">
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-sy">
                @
               </span>
               <span class="crayon-v">
                localhost
               </span>
               <span class="crayon-o">
                ~
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                temp
               </span>
               <span class="crayon-sy">
                $
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                cat
               </span>
               <span class="crayon-v">
                output
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                log
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da65e500629538-2">
               <span class="crayon-e">
                error
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da65e500629538-3">
               <span class="crayon-e">
                warning
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da65e500629538-4">
               <span class="crayon-e">
                info
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da65e500629538-5">
               <span class="crayon-e">
                trace
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da65e500629538-6">
               <span class="crayon-e">
                error
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da65e500629538-7">
               <span class="crayon-e">
                info
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da65e500629538-8">
               <span class="crayon-e">
                info
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da65e500629538-9">
               <span class="crayon-v">
                scala
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                nErrors
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-v">
                sc
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                accumulator
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                0.0
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da65e500629538-10">
               <span class="crayon-v">
                scala
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                logs
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                sc
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                textFile
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                Users
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                temp
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                output
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-i">
                log
               </span>
               ”
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da65e500629538-11">
               <span class="crayon-v">
                scala
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                logs
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                filter
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                _
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                contains
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-i">
                error
               </span>
               ”
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-st">
                foreach
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                x
               </span>
               <span class="crayon-o">
                =&amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-v">
                nErrors
               </span>
               <span class="crayon-o">
                +=
               </span>
               <span class="crayon-cn">
                1
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da65e500629538-12">
               <span class="crayon-v">
                scala
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                nErrors
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                value
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da65e500629538-13">
               <span class="crayon-v">
                Result
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-t">
                Int
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                2
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0037 seconds] -->
       <p>
        <b>
         Broadcast Variables。
        </b>
        实际生产中，通过指定key在RDDs上对数据进行合并的场景非常常见。在这种情况下，很可能会出现给slave nodes发送大体积数据集的情况，让其负责托管需要做join的数据。因此，这里很可能存在巨大的性能瓶颈，因为网络IO比内存访问速度慢100倍。为了解决这个问题，Spark提供了Broadcast Variables，如其名称一样，它会向slave nodes进行广播。因此，节点上的RDD操作可以快速访问Broadcast Variables值。举个例子，期望计算一个文件中所有路线项的运输成本。通过一个look-up table指定每种运输类型的成本，这个look-up table就可以作为Broadcast Variables。
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da666116100872" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          akuntamukkala@localhost~/temp$ cat packagesToShip.txt ground
express
media
priority
priority
ground
express
media
scala&amp;gt; val map = sc.parallelize(Seq((“ground”,1),(“med”,2), (“priority”,5),(“express”,10))).collect().toMap
map: scala.collection.immutable.Map[String,Int] = Map(ground -&amp;gt; 1, media -&amp;gt; 2, priority -&amp;gt; 5, express -&amp;gt; 10)
scala&amp;gt; val bcMailRates = sc.broadcast(map)
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da666116100872-1">
               1
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da666116100872-2">
               2
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da666116100872-3">
               3
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da666116100872-4">
               4
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da666116100872-5">
               5
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da666116100872-6">
               6
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da666116100872-7">
               7
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da666116100872-8">
               8
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da666116100872-9">
               9
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da666116100872-10">
               10
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da666116100872-11">
               11
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da666116100872-1">
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-sy">
                @
               </span>
               <span class="crayon-v">
                localhost
               </span>
               <span class="crayon-o">
                ~
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                temp
               </span>
               <span class="crayon-sy">
                $
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                cat
               </span>
               <span class="crayon-v">
                packagesToShip
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                txt
               </span>
               <span class="crayon-e">
                ground
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da666116100872-2">
               <span class="crayon-e">
                express
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da666116100872-3">
               <span class="crayon-e">
                media
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da666116100872-4">
               <span class="crayon-e">
                priority
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da666116100872-5">
               <span class="crayon-e">
                priority
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da666116100872-6">
               <span class="crayon-e">
                ground
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da666116100872-7">
               <span class="crayon-e">
                express
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da666116100872-8">
               <span class="crayon-e">
                media
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da666116100872-9">
               <span class="crayon-v">
                scala
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                map
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                sc
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                parallelize
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-e">
                Seq
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-i">
                ground
               </span>
               ”
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-cn">
                1
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-i">
                med
               </span>
               ”
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-cn">
                2
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-i">
                priority
               </span>
               ”
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-cn">
                5
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-i">
                express
               </span>
               ”
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-cn">
                10
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                collect
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                toMap
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da666116100872-10">
               <span class="crayon-v">
                map
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                scala
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                collection
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                immutable
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                Map
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-t">
                String
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-t">
                Int
               </span>
               <span class="crayon-sy">
                ]
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                Map
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                ground
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                1
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                media
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                2
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                priority
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                5
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                express
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                10
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da666116100872-11">
               <span class="crayon-v">
                scala
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                bcMailRates
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                sc
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                broadcast
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                map
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0051 seconds] -->
       <p>
        上述命令中，我们建立了一个broadcast variable，基于服务类别成本的map。
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da66d096378730" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          scala&amp;gt; val pts = sc.textFile(“/Users/akuntamukkala/temp/packagesToShip.txt”)
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da66d096378730-1">
               1
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da66d096378730-1">
               <span class="crayon-v">
                scala
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                pts
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                sc
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                textFile
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                Users
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                temp
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                packagesToShip
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-i">
                txt
               </span>
               ”
               <span class="crayon-sy">
                )
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0011 seconds] -->
       <p>
        在上述命令中，我们通过broadcast variable的mailing rates来计算运输成本。
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da673417161646" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          scala&amp;gt; pts.map(shipType=&amp;gt;(shipType,1)).reduceByKey(_+_). map{case (shipType,nPackages)=&amp;gt;(shipType,nPackages*bcMailRates. value(shipType))}.collect()
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da673417161646-1">
               1
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da673417161646-1">
               <span class="crayon-v">
                scala
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                pts
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                map
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                shipType
               </span>
               <span class="crayon-o">
                =&amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                shipType
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-cn">
                1
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                reduceByKey
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                _
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-v">
                _
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                map
               </span>
               <span class="crayon-sy">
                {
               </span>
               <span class="crayon-st">
                case
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                shipType
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-v">
                nPackages
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-o">
                =&amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                shipType
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-e ">
                nPackages*
               </span>
               <span class="crayon-v">
                bcMailRates
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                value
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                shipType
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                }
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                collect
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0021 seconds] -->
       <p>
        通过上述命令，我们使用accumulator来累加所有运输的成本。详细信息可通过下面的PDF查看http://ampcamp.berkeley.edu/wp-content/uploads/2012/06/matei-zaharia-amp-camp-2012-advanced-spark.pdf。
       </p>
       <p>
        <b>
         八、Spark SQL
        </b>
       </p>
       <p>
        通过Spark Engine，Spark SQL提供了一个便捷的途径来进行交互式分析，使用一个被称为SchemaRDD类型的RDD。SchemaRDD可以通过已有RDDs建立，或者其他外部数据格式，比如Parquet files、JSON数据，或者在Hive上运行HQL。SchemaRDD非常类似于RDBMS中的表格。一旦数据被导入SchemaRDD，Spark引擎就可以对它进行批或流处理。Spark SQL提供了两种类型的Contexts——SQLContext和HiveContext，扩展了SparkContext的功能。
       </p>
       <p>
        SparkContext提供了到简单SQL parser的访问，而HiveContext则提供了到HiveQL parser的访问。HiveContext允许企业利用已有的Hive基础设施。
       </p>
       <p>
        这里看一个简单的SQLContext示例。
       </p>
       <p>
        下面文本中的用户数据通过“|”来分割。
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da67a903673122" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          John Smith|38|M|201 East Heading Way #2203,Irving, TX,75063 Liana Dole|22|F|1023 West Feeder Rd, Plano,TX,75093 Craig Wolf|34|M|75942 Border Trail,Fort Worth,TX,75108 John Ledger|28|M|203 Galaxy Way,Paris, TX,75461 Joe Graham|40|M|5023 Silicon Rd,London,TX,76854
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da67a903673122-1">
               1
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da67a903673122-1">
               <span class="crayon-e">
                John
               </span>
               <span class="crayon-v">
                Smith
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-cn">
                38
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-v">
                M
               </span>
               <span class="crayon-o">
                |
               </span>
               <span class="crayon-cn">
                201
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                East
               </span>
               <span class="crayon-e">
                Heading
               </span>
               <span class="crayon-v">
                Way
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-p">
                #2203,Irving, TX,75063 Liana Dole|22|F|1023 West Feeder Rd, Plano,TX,75093 Craig Wolf|34|M|75942 Border Trail,Fort Worth,TX,75108 John Ledger|28|M|203 Galaxy Way,Paris, TX,75461 Joe Graham|40|M|5023 Silicon Rd,London,TX,76854
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0009 seconds] -->
       <p>
        定义Scala case class来表示每一行：
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da680935456945" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          case class Customer(name:String,age:Int,gender:String,address: String)
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da680935456945-1">
               1
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da680935456945-1">
               <span class="crayon-st">
                case
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-t">
                class
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                Customer
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                name
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-t">
                String
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-v">
                age
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-t">
                Int
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-v">
                gender
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-t">
                String
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-v">
                address
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-t">
                String
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0011 seconds] -->
       <p>
        下面的代码片段体现了如何使用SparkContext来建立SQLContext，读取输入文件，将每一行都转换成SparkContext中的一条记录，并通过简单的SQL语句来查询30岁以下的男性用户。
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da686338932395" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          val sparkConf = new SparkConf().setAppName(“Customers”)
val sc = new SparkContext(sparkConf)
val sqlContext = new SQLContext(sc)
val r = sc.textFile(“/Users/akuntamukkala/temp/customers.txt”) val records = r.map(_.split(‘|’))
val c = records.map(r=&amp;gt;Customer(r(0),r(1).trim.toInt,r(2),r(3))) c.registerAsTable(“customers”)
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da686338932395-1">
               1
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da686338932395-2">
               2
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da686338932395-3">
               3
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da686338932395-4">
               4
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da686338932395-5">
               5
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da686338932395-1">
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                sparkConf
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-r">
                new
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                SparkConf
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                setAppName
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-i">
                Customers
               </span>
               ”
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da686338932395-2">
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                sc
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-r">
                new
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                SparkContext
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                sparkConf
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da686338932395-3">
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                sqlContext
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-r">
                new
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                SQLContext
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                sc
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da686338932395-4">
               <span class="crayon-i">
                val
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                r
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                sc
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                textFile
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                Users
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                akuntamukkala
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                temp
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                customers
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-i">
                txt
               </span>
               ”
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                records
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                r
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                map
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                _
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                split
               </span>
               <span class="crayon-sy">
                (
               </span>
               ‘
               <span class="crayon-o">
                |
               </span>
               ’
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da686338932395-5">
               <span class="crayon-i">
                val
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                c
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                records
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                map
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                r
               </span>
               <span class="crayon-o">
                =&amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-e">
                Customer
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-e">
                r
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                0
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-e">
                r
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                1
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                trim
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                toInt
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-e">
                r
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                2
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-e">
                r
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                3
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                c
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                registerAsTable
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-i">
                customers
               </span>
               ”
               <span class="crayon-sy">
                )
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0048 seconds] -->
       <p>
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da68c657670950" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          sqlContext.sql(“select * from customers where gender=’M’ and age &amp;lt;
            30”).collect().foreach(println) Result:[John Ledger,28,M,203 Galaxy Way,Paris,
            TX,75461]
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da68c657670950-1">
               1
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da68c657670950-2">
               2
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da68c657670950-3">
               3
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da68c657670950-1">
               <span class="crayon-v">
                sqlContext
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                sql
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-e ">
                select *
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                from
               </span>
               <span class="crayon-e">
                customers
               </span>
               <span class="crayon-e">
                where
               </span>
               <span class="crayon-v">
                gender
               </span>
               <span class="crayon-o">
                =
               </span>
               ’
               <span class="crayon-i">
                M
               </span>
               ’
               <span class="crayon-h">
               </span>
               <span class="crayon-st">
                and
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                age
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                &amp;
               </span>
               <span class="crayon-v">
                lt
               </span>
               <span class="crayon-sy">
                ;
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da68c657670950-2">
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                30
               </span>
               ”
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                collect
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-st">
                foreach
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                println
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                Result
               </span>
               <span class="crayon-o">
                :
               </span>
               <span class="crayon-sy">
                [
               </span>
               <span class="crayon-e">
                John
               </span>
               <span class="crayon-v">
                Ledger
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-cn">
                28
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-v">
                M
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-cn">
                203
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                Galaxy
               </span>
               <span class="crayon-v">
                Way
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-v">
                Paris
               </span>
               <span class="crayon-sy">
                ,
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da68c657670950-3">
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                TX
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-cn">
                75461
               </span>
               <span class="crayon-sy">
                ]
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0023 seconds] -->
       <p>
        更多使用SQL和HiveQL的示例请访问下面链接https://spark.apache.org/docs/latest/sql-programming-guide.html、https://databricks-training.s3.amazonaws.com/data-exploration-using-spark-sql.html。
       </p>
       <p>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f93b9e37a7_middle.jpg?_=44672"/>
       </p>
       <p>
        <b>
         九、Spark Streaming
        </b>
       </p>
       <p>
        Spark Streaming提供了一个可扩展、容错、高效的途径来处理流数据，同时还利用了Spark的简易编程模型。从真正意义上讲，Spark Streaming会将流数据转换成micro batches，从而将Spark批处理编程模型应用到流用例中。这种统一的编程模型让Spark可以很好地整合批量处理和交互式流分析。下图显示了Spark Streaming可以从不同数据源中读取数据进行分析。
       </p>
       <p>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f93e2a18e5_middle.jpg?_=11621"/>
       </p>
       <p>
        Spark Streaming中的核心抽象是Discretized Stream（DStream）。DStream由一组RDD组成，每个RDD都包含了规定时间（可配置）流入的数据。图12很好地展示了Spark Streaming如何通过将流入数据转换成一系列的RDDs，再转换成DStream。每个RDD都包含两秒（设定的区间长度）的数据。在Spark Streaming中，最小长度可以设置为0.5秒，因此处理延时可以达到1秒以下。
       </p>
       <p>
        Spark Streaming同样提供了 window operators，它有助于更有效率在一组RDD（ a rolling window of time）上进行计算。同时，DStream还提供了一个API，其操作符（transformations和output operators）可以帮助用户直接操作RDD。下面不妨看向包含在Spark Streaming下载中的一个简单示例。示例是在Twitter流中找出趋势hashtags，详见下面代码。
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da694609184904" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          spark-1.0.1/examples/src/main/scala/org/apache/spark/examples/streaming/TwitterPopularTags.scala
val sparkConf = new SparkConf().setAppName(“TwitterPopularTags”)
val ssc = new StreamingContext(sparkConf, Seconds(2))
val stream = TwitterUtils.createStream(ssc, None, filters)
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da694609184904-1">
               1
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da694609184904-2">
               2
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da694609184904-3">
               3
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da694609184904-4">
               4
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da694609184904-1">
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                -
               </span>
               <span class="crayon-cn">
                1.0.1
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                examples
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                src
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                main
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                scala
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                org
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                apache
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                spark
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                examples
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                streaming
               </span>
               <span class="crayon-o">
                /
               </span>
               <span class="crayon-v">
                TwitterPopularTags
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                scala
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da694609184904-2">
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                sparkConf
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-r">
                new
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                SparkConf
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                setAppName
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-i">
                TwitterPopularTags
               </span>
               ”
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da694609184904-3">
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                ssc
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-r">
                new
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                StreamingContext
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                sparkConf
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                Seconds
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                2
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da694609184904-4">
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                stream
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                TwitterUtils
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                createStream
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                ssc
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                None
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                filters
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0030 seconds] -->
       <p>
        上述代码用于建立Spark Streaming Context。Spark Streaming将在DStream中建立一个RDD，包含了每2秒流入的tweets。
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da69b128944794" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          val hashTags = stream.flatMap(status =&amp;gt; status.getText.split(“ “).filter(_.startsWith(“#”)))
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da69b128944794-1">
               1
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da69b128944794-1">
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                hashTags
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                stream
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                flatMap
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                status
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =&amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                status
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                getText
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                split
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-h">
               </span>
               “
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                filter
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                _
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                startsWith
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-p">
                #”)))
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0015 seconds] -->
       <p>
        上述代码片段将Tweet转换成一组words，并过滤出所有以a#开头的。
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da6a1827519569" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          val topCounts60 = hashTags.map((_, 1)).reduceByKeyAndWindow(_ + _, Seconds(60)).map{case (topic, count) =&amp;gt; (count, topic)}. transform(_.sortByKey(false))
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da6a1827519569-1">
               1
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da6a1827519569-1">
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                topCounts60
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                hashTags
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                map
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                _
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                1
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                reduceByKeyAndWindow
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                _
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                +
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                _
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                Seconds
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                60
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                map
               </span>
               <span class="crayon-sy">
                {
               </span>
               <span class="crayon-st">
                case
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                topic
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                count
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =&amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                count
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                topic
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                }
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                transform
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                _
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                sortByKey
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-t">
                false
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0025 seconds] -->
       <p>
        上述代码展示了如何整合计算60秒内一个hashtag流入的总次数。
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da6a7350606735" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          topCounts60.foreachRDD(rdd =&amp;gt; {
val topList = rdd.take(10)
println(“\nPopular topics in last 60 seconds (%s
total):”.format(rdd.count())) topList.foreach{case (count, tag) =&amp;gt; println(“%s (%s
tweets)”.format(tag, count))} })
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da6a7350606735-1">
               1
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da6a7350606735-2">
               2
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da6a7350606735-3">
               3
              </div>
              <div class="crayon-num crayon-striped-num" data-line="crayon-57685da9da6a7350606735-4">
               4
              </div>
              <div class="crayon-num" data-line="crayon-57685da9da6a7350606735-5">
               5
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da6a7350606735-1">
               <span class="crayon-v">
                topCounts60
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                foreachRDD
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                rdd
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =&amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                {
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da6a7350606735-2">
               <span class="crayon-e">
                val
               </span>
               <span class="crayon-v">
                topList
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                rdd
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                take
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-cn">
                10
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da6a7350606735-3">
               <span class="crayon-e">
                println
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-sy">
                \
               </span>
               <span class="crayon-e">
                nPopular
               </span>
               <span class="crayon-e">
                topics
               </span>
               <span class="crayon-st">
                in
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-i">
                last
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                60
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                seconds
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-o">
                %
               </span>
               <span class="crayon-i">
                s
               </span>
              </div>
              <div class="crayon-line crayon-striped-line" id="crayon-57685da9da6a7350606735-4">
               <span class="crayon-v">
                total
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-o">
                :
               </span>
               ”
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                format
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                rdd
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                count
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                topList
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-st">
                foreach
               </span>
               <span class="crayon-sy">
                {
               </span>
               <span class="crayon-st">
                case
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                count
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                tag
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =&amp;
               </span>
               <span class="crayon-v">
                gt
               </span>
               <span class="crayon-sy">
                ;
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-e">
                println
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-o">
                %
               </span>
               <span class="crayon-e">
                s
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-o">
                %
               </span>
               <span class="crayon-i">
                s
               </span>
              </div>
              <div class="crayon-line" id="crayon-57685da9da6a7350606735-5">
               <span class="crayon-v">
                tweets
               </span>
               <span class="crayon-sy">
                )
               </span>
               ”
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                format
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-v">
                tag
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                count
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                )
               </span>
               <span class="crayon-sy">
                }
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-sy">
                }
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0036 seconds] -->
       <p>
        上面代码将找出top 10趋势tweets，然后将其打印。
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da6ad657196025" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          ssc.start()
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da6ad657196025-1">
               1
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da6ad657196025-1">
               <span class="crayon-v">
                ssc
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                start
               </span>
               <span class="crayon-sy">
                (
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0005 seconds] -->
       <p>
        上述代码让Spark Streaming Context 开始检索tweets。一起聚焦一些常用操作，假设我们正在从一个socket中读入流文本。
       </p>
       <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
       <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-57685da9da6b3899068401" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
        <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
         <span class="crayon-title">
         </span>
         <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-wrap-button" title="切换自动换行">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-expand-button" title="点击展开代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-copy-button" title="复制代码">
           <div class="crayon-button-icon">
           </div>
          </div>
          <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
           <div class="crayon-button-icon">
           </div>
          </div>
         </div>
        </div>
        <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
        </div>
        <div class="crayon-plain-wrap">
         <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
          al lines = ssc.socketTextStream(“localhost”, 9999, StorageLevel.MEMORY_AND_DISK_SER)
         </textarea>
        </div>
        <div class="crayon-main" style="">
         <table class="crayon-table">
          <tbody>
           <tr class="crayon-row">
            <td class="crayon-nums " data-settings="show">
             <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
              <div class="crayon-num" data-line="crayon-57685da9da6b3899068401-1">
               1
              </div>
             </div>
            </td>
            <td class="crayon-code">
             <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
              <div class="crayon-line" id="crayon-57685da9da6b3899068401-1">
               <span class="crayon-e">
                al
               </span>
               <span class="crayon-v">
                lines
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-o">
                =
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                ssc
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-e">
                socketTextStream
               </span>
               <span class="crayon-sy">
                (
               </span>
               “
               <span class="crayon-i">
                localhost
               </span>
               ”
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-cn">
                9999
               </span>
               <span class="crayon-sy">
                ,
               </span>
               <span class="crayon-h">
               </span>
               <span class="crayon-v">
                StorageLevel
               </span>
               <span class="crayon-sy">
                .
               </span>
               <span class="crayon-v">
                MEMORY_AND_DISK_SER
               </span>
               <span class="crayon-sy">
                )
               </span>
              </div>
             </div>
            </td>
           </tr>
          </tbody>
         </table>
        </div>
       </div>
       <!-- [Format Time: 0.0011 seconds] -->
       <p>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f972b98c65_middle.jpg?_=1784"/>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f976483601_middle.jpg?_=58158"/>
        <img src="http://img.ptcms.csdn.net/article/201507/10/559f97d43c8bb_middle.jpg"/>
       </p>
       <p>
        更多operators请访问http://spark.apache.org/docs/latest/streaming-programming-guide.html#transformations
       </p>
       <p>
        Spark Streaming拥有大量强大的output operators，比如上文提到的 foreachRDD()，了解更多可访问
        <a href="http://spark.apache.org/docs/latest/streaming-programming-guide.html#output-operations">
         http://spark.apache.org/docs/latest/streaming-programming-guide.html#output-operations
        </a>
        。
       </p>
       <p>
        <b>
         十、附加学习资源
        </b>
       </p>
       <ul>
        <li>
         Wikipedia article (good):
         <a href="http://en.wikipedia.org/wiki/Apache_Spark">
          http://en.wikipedia.org/wiki/Apache_Spark
         </a>
        </li>
        <li>
         Launching a Spark cluster on EC2:
         <a href="http://ampcamp.berkeley.edu/exercises-strata-conf-2013/launching-a-cluster.html">
          http://ampcamp.berkeley.edu/exercises-strata-conf-2013/launching-a-cluster.html
         </a>
        </li>
        <li>
         Quick start:
         <a href="https://spark.apache.org/docs/1.0.1/quick-start.html">
          https://spark.apache.org/docs/1.0.1/quick-start.html
         </a>
        </li>
        <li>
         The Spark platform provides MLLib(machine learning) and GraphX(graph algorithms). The following links provide more information:
         <a href="https://spark.apache.org/docs/latest/mllib-guide.html">
          https://spark.apache.org/docs/latest/mllib-guide.html
         </a>
         、
         <a href="https://spark.apache.org/docs/1.0.1/graphx-programming-guide.html">
          https://spark.apache.org/docs/1.0.1/graphx-programming-guide.html
         </a>
         、
         <a href="https://dzone.com/refcardz/apache-spark">
          https://dzone.com/refcardz/apache-spark
         </a>
        </li>
       </ul>
       <p>
        原文链接：
        <a href="https://dzone.com/refcardz/apache-spark" target="_blank">
         Apache Spark：An Engine for Large-Scale Data Processing
        </a>
       </p>
      </div>
      <div>
       <strong>
        注：转载文章均来自于公开网络，仅供学习使用，不会用于任何商业用途，如果侵犯到原作者的权益，请您与我们联系删除或者授权事宜，联系邮箱：contact@dataunion.org。转载数盟网站文章请注明原文章作者，否则产生的任何版权纠纷与数盟无关。
       </strong>
      </div>
      <!--content_text-->
      <div class="fenxian">
       <!-- JiaThis Button BEGIN -->
       <div class="jiathis_style_32x32">
        <p class="jiathis_button_weixin">
        </p>
        <p class="jiathis_button_tsina">
        </p>
        <p class="jiathis_button_qzone">
        </p>
        <p class="jiathis_button_cqq">
        </p>
        <p class="jiathis_button_tumblr">
        </p>
        <a class="jiathis jiathis_txt jtico jtico_jiathis" href="http://www.jiathis.com/share" target="_blank">
        </a>
        <p class="jiathis_counter_style">
        </p>
       </div>
       <!-- JiaThis Button END -->
      </div>
     </article>
     <!--content-->
     <!--相关文章-->
     <div class="xianguan">
      <div class="xianguantitle">
       相关文章！
      </div>
      <ul class="pic">
       <li>
        <a href="http://dataunion.org/20824.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/t018630756a7e263b33-300x165.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20824.html" rel="bookmark" title="如何判断一笔交易是否属于欺诈？你只是需要一点数据挖掘">
         如何判断一笔交易是否属于欺诈？你只是需要一点数据挖掘
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20820.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/1-300x200.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20820.html" rel="bookmark" title="人们对Python在企业级开发中的10大误解">
         人们对Python在企业级开发中的10大误解
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20811.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/t0133fcacae8523307b_副本-300x200.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20811.html" rel="bookmark" title="大神亲传：26条深度学习的金科玉律！">
         大神亲传：26条深度学习的金科玉律！
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20808.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/640.webp-11-300x137.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20808.html" rel="bookmark" title="我们是如何在一张地图上表现86万个数据的">
         我们是如何在一张地图上表现86万个数据的
        </a>
       </li>
      </ul>
     </div>
     <!--相关文章-->
     <div class="comment" id="comments">
      <!-- You can start editing here. -->
      <!-- If comments are open, but there are no comments. -->
      <div class="title">
       期待你一针见血的评论，Come on！
      </div>
      <div id="respond">
       <p>
        不用想啦，马上
        <a href="http://dataunion.org/wp-login.php?redirect_to=http%3A%2F%2Fdataunion.org%2F20001.html">
         "登录"
        </a>
        发表自已的想法.
       </p>
      </div>
     </div>
     <!-- .nav-single -->
    </div>
    <!--Container End-->
    <aside id="sitebar">
     <div class="sitebar_list2">
      <div class="wptag">
       <span class="tagtitle">
        热门标签+
       </span>
       <div class="tagg">
        <ul class="menu" id="menu-%e5%8f%8b%e6%83%85%e9%93%be%e6%8e%a5">
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-1605" id="menu-item-1605">
          <a href="http://taidizh.com/">
           泰迪智慧
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-20884" id="menu-item-20884">
          <a href="http://www.transwarp.cn/">
           星环科技
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-3538" id="menu-item-3538">
          <a href="http://datall.org/">
           珈和遥感
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-20888" id="menu-item-20888">
          <a href="http://www.chinahadoop.cn/">
           小象学院
          </a>
         </li>
        </ul>
       </div>
      </div>
     </div>
     <div class="sitebar_list">
      <div class="textwidget">
       <div align="center">
        <a href="http://study.163.com/course/courseMain.htm?courseId=991022" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2016/03/dv.jpg"/>
        </a>
       </div>
      </div>
     </div>
     <div class="sitebar_list">
      <h4 class="sitebar_title">
       文章分类
      </h4>
      <div class="tagcloud">
       <a class="tag-link-44" href="http://dataunion.org/category/industry/demo" style="font-size: 10.204724409449pt;" title="4个话题">
        Demo展示
       </a>
       <a class="tag-link-31" href="http://dataunion.org/category/experts" style="font-size: 15.826771653543pt;" title="52个话题">
        专家团队
       </a>
       <a class="tag-link-870" href="http://dataunion.org/category/tech/ai" style="font-size: 19.795275590551pt;" title="273个话题">
        人工智能
       </a>
       <a class="tag-link-488" href="http://dataunion.org/category/%e5%8a%a0%e5%85%a5%e6%95%b0%e7%9b%9f" style="font-size: 8pt;" title="1个话题">
        加入数盟
       </a>
       <a class="tag-link-869" href="http://dataunion.org/category/tech/viz" style="font-size: 17.204724409449pt;" title="93个话题">
        可视化
       </a>
       <a class="tag-link-30" href="http://dataunion.org/category/partners" style="font-size: 10.645669291339pt;" title="5个话题">
        合作伙伴
       </a>
       <a class="tag-link-889" href="http://dataunion.org/category/parterc" style="font-size: 11.582677165354pt;" title="8个话题">
        合作会议
       </a>
       <a class="tag-link-104" href="http://dataunion.org/category/books" style="font-size: 12.96062992126pt;" title="15个话题">
        图书
       </a>
       <a class="tag-link-220" href="http://dataunion.org/category/tech/base" style="font-size: 19.850393700787pt;" title="281个话题">
        基础架构
       </a>
       <a class="tag-link-219" href="http://dataunion.org/category/tech/analysis" style="font-size: 19.409448818898pt;" title="232个话题">
        数据分析
       </a>
       <a class="tag-link-887" href="http://dataunion.org/category/tech/dm" style="font-size: 13.291338582677pt;" title="17个话题">
        数据挖掘
       </a>
       <a class="tag-link-34" href="http://dataunion.org/category/tech" style="font-size: 20.732283464567pt;" title="404个话题">
        文章
       </a>
       <a class="tag-link-1" href="http://dataunion.org/category/uncategorized" style="font-size: 22pt;" title="693个话题">
        未分类
       </a>
       <a class="tag-link-4" href="http://dataunion.org/category/events" style="font-size: 14.503937007874pt;" title="29个话题">
        活动
       </a>
       <a class="tag-link-890" href="http://dataunion.org/category/tech/%e6%b7%b1%e5%ba%a6%e5%ad%a6%e4%b9%a0" style="font-size: 10.204724409449pt;" title="4个话题">
        深度学习
       </a>
       <a class="tag-link-221" href="http://dataunion.org/category/tech/devl" style="font-size: 18.968503937008pt;" title="193个话题">
        编程语言
       </a>
       <a class="tag-link-888" href="http://dataunion.org/category/career" style="font-size: 15.661417322835pt;" title="48个话题">
        职业规划
       </a>
       <a class="tag-link-5" href="http://dataunion.org/category/jobs" style="font-size: 14.11811023622pt;" title="25个话题">
        职位
       </a>
       <a class="tag-link-871" href="http://dataunion.org/category/industry" style="font-size: 15.716535433071pt;" title="49个话题">
        行业
       </a>
       <a class="tag-link-613" href="http://dataunion.org/category/industry/case" style="font-size: 16.984251968504pt;" title="84个话题">
        行业应用
       </a>
       <a class="tag-link-885" href="http://dataunion.org/category/industry/news" style="font-size: 17.425196850394pt;" title="102个话题">
        行业资讯
       </a>
       <a class="tag-link-10" href="http://dataunion.org/category/training" style="font-size: 14.228346456693pt;" title="26个话题">
        课程
       </a>
       <a class="tag-link-16" href="http://dataunion.org/category/sources" style="font-size: 15.661417322835pt;" title="48个话题">
        资源
       </a>
      </div>
     </div>
     <div class="sitebar_list">
      <h4 class="sitebar_title">
       功能
      </h4>
      <ul>
       <li>
        <a href="http://dataunion.org/wp-login.php?action=register">
         注册
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/wp-login.php">
         登录
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/feed">
         文章
         <abbr title="Really Simple Syndication">
          RSS
         </abbr>
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/comments/feed">
         评论
         <abbr title="Really Simple Syndication">
          RSS
         </abbr>
        </a>
       </li>
       <li>
        <a href="https://cn.wordpress.org/" title="基于WordPress，一个优美、先进的个人信息发布平台。">
         WordPress.org
        </a>
       </li>
      </ul>
     </div>
    </aside>
    <div class="clear">
    </div>
   </div>
   <!--main-->
   ﻿
   <footer id="dibu">
    <div class="about">
     <div class="right">
      <ul class="menu" id="menu-%e5%ba%95%e9%83%a8%e8%8f%9c%e5%8d%95">
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-18024" id="menu-item-18024">
        <a href="http://dataunion.org/category/partners">
         合作伙伴
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-20881" id="menu-item-20881">
        <a href="http://dataunion.org/contribute">
         文章投稿
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-20872" id="menu-item-20872">
        <a href="http://dataunion.org/category/%e5%8a%a0%e5%85%a5%e6%95%b0%e7%9b%9f">
         加入数盟
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-22441" id="menu-item-22441">
        <a href="http://dataunion.org/f-links">
         友情链接
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-20874" id="menu-item-20874">
        <a href="http://dataunion.org/aboutus">
         关于数盟
        </a>
       </li>
      </ul>
      <p class="banquan">
       数盟社区        ，
        做最棒的数据科学社区
      </p>
     </div>
     <div class="left">
      <ul class="bottomlist">
       <li>
        <a href="http://weibo.com/DataScientistUnion  " target="_blank" 　title="">
         <img src="http://dataunion.org/wp-content/themes/yzipi/images/weibo.png"/>
        </a>
       </li>
       <li>
        <a class="cd-popup-trigger" href="http://dataunion.org/20001.html#0">
         <img src="http://dataunion.org/wp-content/themes/yzipi/images/weixin.png"/>
        </a>
       </li>
      </ul>
      <div class="cd-popup">
       <div class="cd-popup-container">
        <h1>
         扫描二维码,加微信公众号
        </h1>
        <img src="http://dataunion.org/wp-content/themes/yzipi/images/2014-12-06-1515289049.png"/>
        <a class="cd-popup-close" href="http://dataunion.org/20001.html">
        </a>
       </div>
       <!-- cd-popup-container -->
      </div>
      <!-- cd-popup -->
     </div>
    </div>
    <!--about-->
    <div class="bottom">
     <a href="http://dataunion.org/">
      数盟社区
     </a>
     <a href="http://www.miitbeian.gov.cn/" rel="external nofollow" target="_blank">
      京ICP备14026740号
     </a>
     联系我们：
     <a href="mailto:contact@dataunion.org" target="_blank">
      contact@dataunion.org
     </a>
     <div class="tongji">
     </div>
     <!--bottom-->
     <div class="scroll" id="scroll" style="display:none;">
      ︿
     </div>
    </div>
   </footer>
   <!--dibu-->
  </div>
 </body>
</html>